1 /**
2  * Open-Addressed Hash Set
3  * Copyright: © 2015 Economic Modeling Specialists, Intl.
4  * Authors: Brian Schott
5  * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
6  */
7 module containers.openhashset;
8 
9 private import containers.internal.hash;
10 private import containers.internal.node : shouldAddGCRange;
11 private import std.experimental.allocator.common : stateSize;
12 private import std.experimental.allocator.mallocator : Mallocator;
13 
14 /**
15  * Simple open-addressed hash set that uses linear probing to resolve sollisions.
16  *
17  * Params:
18  *     T = the element type of the hash set
19  *     Allocator = the allocator to use. Defaults to `Mallocator`.
20  *     hashFunction = the hash function to use
21  *     supportGC = if true, calls to GC.addRange and GC.removeRange will be used
22  *         to ensure that the GC does not accidentally free memory owned by this
23  *         container.
24  */
25 struct OpenHashSet(T, Allocator = Mallocator,
26 	alias hashFunction = generateHash!T, bool supportGC = shouldAddGCRange!T)
27 {
28 	/**
29 	 * Disallow copy construction
30 	 */
31 	this(this) @disable;
32 
33 	static if (stateSize!Allocator != 0)
34 	{
35 		this() @disable;
36 
37 		/**
38 		 * Use the given `allocator` for allocations.
39 		 */
40 		this(Allocator allocator)
41 		in
42 		{
43 			assert(allocator !is null, "Allocator must not be null");
44 		}
45 		do
46 		{
47 			this.allocator = allocator;
48 		}
49 
50 		/**
51 		 * Initializes the hash set with the given initial capacity.
52 		 *
53 		 * Params:
54 		 *     initialCapacity = the initial capacity for the hash set
55 		 */
56 		this(size_t initialCapacity, Allocator allocator)
57 		in
58 		{
59 			assert(allocator !is null, "Allocator must not be null");
60 			assert ((initialCapacity & initialCapacity - 1) == 0, "initialCapacity must be a power of 2");
61 		}
62 		do
63 		{
64 			this.allocator = allocator;
65 			initialize(initialCapacity);
66 		}
67 	}
68 	else
69 	{
70 		/**
71 		 * Initializes the hash set with the given initial capacity.
72 		 *
73 		 * Params:
74 		 *     initialCapacity = the initial capacity for the hash set
75 		 */
76 		this(size_t initialCapacity)
77 		in
78 		{
79 			assert ((initialCapacity & initialCapacity - 1) == 0, "initialCapacity must be a power of 2");
80 		}
81 		do
82 		{
83 			initialize(initialCapacity);
84 		}
85 	}
86 
87 	~this() nothrow
88 	{
89 		static if (useGC)
90 			GC.removeRange(nodes.ptr);
91 		allocator.deallocate(nodes);
92 	}
93 
94 	/**
95 	 * Removes all items from the hash set.
96 	 */
97 	void clear()
98 	{
99 		if (empty)
100 			return;
101 		foreach (ref node; nodes)
102 		{
103 			typeid(typeof(node)).destroy(&node);
104 			node.used = false;
105 		}
106 		_length = 0;
107 	}
108 
109 	///
110 	bool empty() const pure nothrow @nogc @safe @property
111 	{
112 		return _length == 0;
113 	}
114 
115 	///
116 	size_t length() const pure nothrow @nogc @safe @property
117 	{
118 		return _length;
119 	}
120 
121 	/**
122 	 * Returns:
123 	 *     $(B true) if the hash set contains the given item, false otherwise.
124 	 */
125 	bool contains(T item) const
126 	{
127 		if (empty)
128 			return false;
129 		immutable size_t hash = hashFunction(item);
130 		size_t index = toIndex(nodes, item, hash);
131 		if (index == size_t.max)
132 			return false;
133 		return nodes[index].hash == hash && nodes[index].data == item;
134 	}
135 
136 	/// ditto
137 	bool opBinaryRight(string op)(T item) inout if (op == "in")
138 	{
139 		return contains(item);
140 	}
141 
142 	/**
143 	 * Inserts the given item into the set.
144 	 *
145 	 * Returns:
146 	 *     $(B true) if the item was inserted, false if it was already present.
147 	 */
148 	bool insert(T item)
149 	{
150 		if (nodes.length == 0)
151 			initialize(DEFAULT_BUCKET_COUNT);
152 		immutable size_t hash = hashFunction(item);
153 		size_t index = toIndex(nodes, item, hash);
154 		if (index == size_t.max)
155 		{
156 			grow();
157 			index = toIndex(nodes, item, hash);
158 		}
159 		else if (nodes[index].used && nodes[index].hash == hash && nodes[index].data == item)
160 			return false;
161 		nodes[index].used = true;
162 		nodes[index].hash = hash;
163 		nodes[index].data = item;
164 		_length++;
165 		return true;
166 	}
167 
168 	/// ditto
169 	alias put = insert;
170 
171 	/// ditto
172 	alias insertAnywhere = insert;
173 
174 	/// ditto
175 	bool opOpAssign(string op)(T item) if (op == "~")
176 	{
177 		return insert(item);
178 	}
179 
180 	/**
181 	 * Params:
182 	 *     item = the item to remove
183 	 * Returns:
184 	 *     $(B true) if the item was removed, $(B false) if it was not present
185 	 */
186 	bool remove(T item)
187 	{
188 		if (empty)
189 			return false;
190 		immutable size_t hash = hashFunction(item);
191 		size_t index = toIndex(nodes, item, hash);
192 		if (index == size_t.max)
193 			return false;
194 		nodes[index].used = false;
195 		destroy(nodes[index].data);
196 		_length--;
197 		return true;
198 	}
199 
200 	/**
201 	 * Returns:
202 	 *     A range over the set.
203 	 */
204 	auto opSlice(this This)() nothrow pure @nogc @safe
205 	{
206 		return Range!(This)(nodes);
207 	}
208 
209 	mixin AllocatorState!Allocator;
210 
211 private:
212 
213 	import containers.internal.element_type : ContainerElementType;
214 	import containers.internal.mixins : AllocatorState;
215 	import containers.internal.storage_type : ContainerStorageType;
216 	import core.memory : GC;
217 
218 	enum bool useGC = supportGC && shouldAddGCRange!T;
219 
220 	static struct Range(ThisT)
221 	{
222 		ET front()
223 		{
224 			return cast(typeof(return)) nodes[index].data;
225 		}
226 
227 		bool empty() const pure nothrow @safe @nogc @property
228 		{
229 			return index >= nodes.length;
230 		}
231 
232 		void popFront() pure nothrow @safe @nogc
233 		{
234 			index++;
235 			while (index < nodes.length && !nodes[index].used)
236 				index++;
237 		}
238 
239 	private:
240 
241 		alias ET = ContainerElementType!(ThisT, T);
242 
243 		this(const Node[] nodes)
244 		{
245 			this.nodes = nodes;
246 			while (true)
247 			{
248 				if (index >= nodes.length || nodes[index].used)
249 					break;
250 				index++;
251 			}
252 		}
253 
254 		size_t index;
255 		const Node[] nodes;
256 	}
257 
258 	void grow()
259 	{
260 		immutable size_t newCapacity = nodes.length << 1;
261 		Node[] newNodes = (cast (Node*) allocator.allocate(newCapacity * Node.sizeof))
262 			[0 .. newCapacity];
263 		newNodes[] = Node.init;
264 		static if (useGC)
265 			GC.addRange(newNodes.ptr, newNodes.length * Node.sizeof, typeid(typeof(nodes)));
266 		foreach (ref node; nodes)
267 		{
268 			immutable size_t newIndex = toIndex(newNodes, node.data, node.hash);
269 			newNodes[newIndex] = node;
270 		}
271 		static if (useGC)
272 			GC.removeRange(nodes.ptr);
273 		allocator.deallocate(nodes);
274 		nodes = newNodes;
275 	}
276 
277 	void initialize(size_t nodeCount)
278 	{
279 		nodes = (cast (Node*) allocator.allocate(nodeCount * Node.sizeof))[0 .. nodeCount];
280 		static if (useGC)
281 			GC.addRange(nodes.ptr, nodes.length * Node.sizeof, typeid(typeof(nodes)));
282         nodes[] = Node.init;
283 		_length = 0;
284 	}
285 
286 	// Returns: size_t.max if the item was not found
287 	static size_t toIndex(const Node[] n, T item, size_t hash)
288 	{
289 		assert (n.length > 0, "Empty node");
290 		immutable size_t index = hashToIndex(hash, n.length);
291 		size_t i = index;
292 		immutable bucketMask = n.length - 1;
293 		while (n[i].used && n[i].data != item)
294 		{
295 			i = (i + 1) & bucketMask;
296 			if (i == index)
297 				return size_t.max;
298 		}
299 		return i;
300 	}
301 
302 	Node[] nodes;
303 	size_t _length;
304 
305 	static struct Node
306 	{
307 		ContainerStorageType!T data;
308 		bool used;
309 		size_t hash;
310 	}
311 }
312 
313 version(emsi_containers_unittest) unittest
314 {
315 	import std..string : format;
316 	import std.algorithm : equal, sort;
317 	import std.range : iota;
318 	import std.array : array;
319 	OpenHashSet!int ints;
320 	assert (ints.empty);
321 	assert (equal(ints[], cast(int[]) []));
322 	ints.clear();
323 	ints.insert(10);
324 	assert (!ints.empty);
325 	assert (ints.length == 1);
326 	assert (equal(ints[], [10]));
327 	assert (ints.contains(10));
328 	ints.clear();
329 	assert (ints.length == 0);
330 	assert (ints.empty);
331 	ints ~= 0;
332 	assert (!ints.empty);
333 	assert (ints.length == 1);
334 	assert (equal(ints[], [0]));
335 	ints.clear();
336 	assert (ints.length == 0);
337 	assert (ints.empty);
338 	foreach (i; 0 .. 100)
339 		ints ~= i;
340 	assert (ints.length == 100, "%d".format(ints.length));
341 	assert (!ints.empty);
342 	foreach (i; 0 .. 100)
343 		assert (i in ints);
344 	assert (equal(ints[].array().sort(), iota(0, 100)));
345 	assert (ints.insert(10) == false);
346 	auto ohs = OpenHashSet!int(8);
347 	assert (!ohs.remove(1000));
348 	assert (ohs.contains(99) == false);
349 	assert (ohs.insert(10) == true);
350 	assert (ohs.insert(10) == false);
351 	foreach (i; 0 .. 7)
352 		ohs.insert(i);
353 	assert (ohs.contains(6));
354 	assert (!ohs.contains(100));
355 	assert (!ohs.remove(9999));
356 	assert (ohs.remove(0));
357 	assert (ohs.remove(1));
358 }
359 
360 version(emsi_containers_unittest) unittest
361 {
362 	static class Foo
363 	{
364 		string name;
365 
366 		override bool opEquals(Object other) const @safe pure nothrow @nogc
367 		{
368 			Foo f = cast(Foo)other;
369 			return f !is null && f.name == this.name;
370 		}
371 	}
372 
373 	hash_t stringToHash(string str) @safe pure nothrow @nogc
374 	{
375 		hash_t hash = 5381;
376 		return hash;
377 	}
378 
379 	hash_t FooToHash(Foo e) pure @safe nothrow @nogc
380 	{
381 		return stringToHash(e.name);
382 	}
383 
384 	OpenHashSet!(Foo, Mallocator, FooToHash) hs;
385 	auto f = new Foo;
386 	hs.insert(f);
387 	assert(f in hs);
388 	auto r = hs[];
389 }